import requests
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("/data1/jzh/models/Llama-2-7B-fp16")
url = "http://localhost:8000/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
    "model": "/data1/jzh/models/Llama-2-7B-fp16",
    "messages": [
        {
            "role": "user",
            "content": "你好，世界！"
        }
    ],
    "max_tokens": 10,
    "temperature": 0.7
}

tokenizer.chat_template = """
<s>[INST] {% if messages[0]["role"] == "system" %}
<<SYS>>
{{ messages[0]["content"] }}
<</SYS>>
{% endif %}
{% if messages[0]["role"] == "user" %}{{ messages[0]["content"] }}{% endif %}
[/INST]"""

print(tokenizer.chat_template)
response = requests.post(url, headers=headers, json=data)
print(response.json())